{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import face_recognition\n",
    "from face_recognition import face_locations\n",
    "import os\n",
    "import pandas as pd\n",
    "import torch\n",
    "import h5py\n",
    "import scipy.io\n",
    "import numpy as np\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "img_dir = '/mnt/hdd1/data/face/LFWA/cropped/original/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "label_mat = '/mnt/hdd1/data/face/LFWA/label.mat'\n",
    "label = scipy.io.loadmat(label_mat)['label']\n",
    "name_mat = '/mnt/hdd1/data/face/LFWA/name.mat'\n",
    "name = scipy.io.loadmat(name_mat)['name']\n",
    "name = [s[0].split('\\\\')[1] for s in name.tolist()[0]]\n",
    "attr_name_mat = '/mnt/hdd1/data/face/LFWA/attrname.mat'\n",
    "attr_name = scipy.io.loadmat(attr_name_mat)['AttrName']\n",
    "attr_name = [str(s[0]) for s in attr_name.tolist()[0]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df_label = pd.DataFrame(label, columns=attr_name, index=name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13143"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 13143/13143 [09:54<00:00, 22.12it/s]\n"
     ]
    }
   ],
   "source": [
    "vecs = []\n",
    "fnames = []\n",
    "i = 0\n",
    "for fname in tqdm(df_label.index):\n",
    "    i += 1\n",
    "    img_path = os.path.join(img_dir, fname)\n",
    "    X_img = face_recognition.load_image_file(img_path)\n",
    "    X_faces_loc = face_locations(X_img)\n",
    "    if len(X_faces_loc) != 1:\n",
    "        continue\n",
    "    faces_encoding = face_recognition.face_encodings(X_img, known_face_locations=X_faces_loc)[0]\n",
    "    \n",
    "    vecs.append(faces_encoding)\n",
    "    fnames.append(fname)\n",
    "    \n",
    "df_feat = pd.DataFrame(vecs, index=fnames)\n",
    "df_label = df_label[df_label.index.isin(df_feat.index)]\n",
    "df_feat.sort_index(inplace=True)\n",
    "df_label.sort_index(inplace=True)\n",
    "\n",
    "df_feat.to_csv('/mnt/hdd1/data/face/LFWA/feature.csv')\n",
    "df_label.to_csv('/mnt/hdd1/data/face/LFWA/label.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
